*************************************************
* Purpose -- Preparing different samples for
* analysis using CBP data as provided in JNR's
* replication files
************************************************
clear all
* RUN 00_path_master.do FIRST TO GET FILEPATHS

************************************************
* CZ-state dataset
************************************************
frame copy default cbp_cz_state
frame change cbp_cz_state

*Load data
use "$raw/jnr-cbp/cbp_czone_state_20ind_sample", clear

*Remove DC (starts in 2004)
drop if czone==11304 & state==11

*Keep restaurant industry
keep if sic87xx == 5800

* Rename key variables
rename (czone state emp ap workagepop max_mw) ///
	   (cz1990 statefips emp75 ap75 pop mw)

* Generate id
egen czstate = group(cz1990 statefips)

* Merge with price index
preserve
	import excel using "$raw/other/r-cpi-u-rs-allitems.xlsx", cellrange(A6) firstrow clear
	rename YEAR - AVG, lower
	keep year avg
	keep if inrange(year,1980,2019)
	
	* reindex to have 2023 as 100
	gen index = (avg/449.3)*100 //449.3 is index for 2023
	replace index = index/100
	
	drop avg
	tempfile cpi_rs
	save `cpi_rs'
restore

merge m:1 year using `cpi_rs', assert(2 3) keep(3) nogen

* Generate Fed MW
gen fed_mw = .
replace fed_mw = 3.10 if year == 1980
replace fed_mw = 3.35 if year >= 1981 & year <=1989
replace fed_mw = 3.80 if year == 1990
replace fed_mw = 4.25 if year >= 1991 & year <= 1995
replace fed_mw = 4.75 if year == 1996
replace fed_mw = 5.15 if year >= 1997 & year <= 2006
replace fed_mw = 5.85 if year == 2007
replace fed_mw = 6.55 if year == 2008
replace fed_mw = 7.25 if year >= 2009

* Create variables of interest
gen lemp75 = ln(emp75)
gen lmw = ln(mw)
gen lpop = ln(pop)

gen avg_earnings = (ap75/emp75)/52.14
gen real_earnings = avg_earnings/index
gen learn75 = ln(real_earnings)

gen lemp_other = ln(totemp - emp75)

gen avg_earnings_other = ((totap - ap75)/(totemp - emp75))/52.14
gen real_earnings_other = avg_earnings_other/index
gen learn_other = ln(real_earnings_other)

* Sort and save
sort czstate year
xtset czstate year

save "$clean/cbp/cz_state_sample.dta", replace

************************************************
* MSCZ pairs dataset
************************************************
frame copy default mscz
frame change mscz

* Load the data
use "$raw/jnr-cbp/cbp_stacked_czonestatepair_sample.dta", clear

* Rename key variables
rename (czonestatepair state emp ap workagepop) ///
	   (pair_id_num statefips emp75 ap75 pop)
	   
* Generate border segment
sort pair_id_num year
 
gen st_min = min(statea, stateb)
gen st_max = max(statea, stateb)

* Generate ids
egen bordersegment = group(st_min st_max)
egen pair_id_czstate = group(czonestate pair_id_num)

* Merge with price index
preserve
	import excel using "$raw/other/r-cpi-u-rs-allitems.xlsx", cellrange(A6) firstrow clear
	rename YEAR - AVG, lower
	keep year avg
	keep if inrange(year,1980,2019)
	
	* Reindex to have 2023 as 100
	gen index = (avg/449.3)*100 //449.3 is index for 2023
	replace index = index/100
	
	drop avg
	tempfile cpi_rs
	save `cpi_rs'
restore

merge m:1 year using `cpi_rs', assert(2 3) keep(3) nogen

* Generate Fed MW
gen fed_mw = .
replace fed_mw = 3.10 if year == 1980
replace fed_mw = 3.35 if year >= 1981 & year <=1989
replace fed_mw = 3.80 if year == 1990
replace fed_mw = 4.25 if year >= 1991 & year <= 1995
replace fed_mw = 4.75 if year == 1996
replace fed_mw = 5.15 if year >= 1997 & year <= 2006
replace fed_mw = 5.85 if year == 2007
replace fed_mw = 6.55 if year == 2008
replace fed_mw = 7.25 if year >= 2009

* Create variables of interest
gen lemp75 = ln(emp75)
gen lmw = ln(mw)
gen lpop = ln(pop)

gen avg_earnings = (ap75/emp75)/52.14
gen real_earnings = avg_earnings/index
gen learn75 = ln(real_earnings)

gen lemp_other = ln(totemp - emp75)

gen avg_earnings_other = ((totap - ap75)/(totemp - emp75))/52.14
gen real_earnings_other = avg_earnings_other/index
gen learn_other = ln(real_earnings_other)

* Sort and save
sort pair_id_czstate year
xtset pair_id_czstate year

save "$clean/cbp/mscz_sample.dta", replace


************************************************
* Border county pairs dataset
************************************************
frame copy default bcp
frame change bcp

* Load data
use "$raw/jnr-cbp/cbp_stacked_countypair_sample", clear

frame copy bcp bcp_default
frame change bcp_default
* Keep only border counties
merge m:1 countypair_id county using "$raw/jnr-cbp/countypairtypes.dta", nogen
keep if type0 == 1
drop type*

forvalues i = 1/4 {
	frame copy bcp bcp_ss`i'
	frame change bcp_ss`i'
	merge m:1 countypair_id county using "$raw/jnr-cbp/countypairtypes.dta", nogen
	keep if type`i' == 1
	drop type*
}

foreach frame in bcp_default bcp_ss1 bcp_ss2 bcp_ss3 bcp_ss4 {
	frame change `frame'
	
	* Rename key variables
	rename (state emp ap workagepop) ///
		   (statefips emp75 ap75 pop)

	* Generate ids
	egen pair_id_num = group(countypair_id)
	egen pair_id_county = group(pair_id county)

	* Generate bordersegment
	gen st_min = min(statea, stateb)
	gen st_max = max(statea, stateb)
	egen bordersegment = group(st_min st_max)

	* Merge with price index
	preserve
		import excel using "$raw/other/r-cpi-u-rs-allitems.xlsx", cellrange(A6) firstrow clear
		rename YEAR - AVG, lower
		keep year avg
		keep if inrange(year,1980,2019)
		
		* reindex to have 2023 as 100
		gen index = (avg/449.3)*100 //449.3 is index for 2023
		replace index = index/100
		
		drop avg
		tempfile cpi_rs
		save `cpi_rs'
	restore

	merge m:1 year using `cpi_rs', assert(2 3) keep(3) nogen

	* Generate Fed MW
	gen fed_mw = .
	replace fed_mw = 3.10 if year == 1980
	replace fed_mw = 3.35 if year >= 1981 & year <=1989
	replace fed_mw = 3.80 if year == 1990
	replace fed_mw = 4.25 if year >= 1991 & year <= 1995
	replace fed_mw = 4.75 if year == 1996
	replace fed_mw = 5.15 if year >= 1997 & year <= 2006
	replace fed_mw = 5.85 if year == 2007
	replace fed_mw = 6.55 if year == 2008
	replace fed_mw = 7.25 if year >= 2009

	* Create variables of interest
	gen lemp75 = ln(emp75)
	gen lmw = ln(mw)
	gen lpop = ln(pop)

	gen avg_earnings = (ap75/emp75)/52.14
	gen real_earnings = avg_earnings/index
	gen learn75 = ln(real_earnings)

	gen lemp_other = ln(totemp - emp75)

	gen avg_earnings_other = ((totap - ap75)/(totemp - emp75))/52.14
	gen real_earnings_other = avg_earnings_other/index
	gen learn_other = ln(real_earnings_other)

	* Sort and save
	sort pair_id_county year
	xtset pair_id_county year
}


frame change bcp_default
save "$clean/cbp/bcp_sample.dta", replace

forvalues i = 1/4 {
	frame change bcp_ss`i'
	save "$clean/cbp/bcp_subsample`i'.dta", replace
}

